1 Introduction

The objective of this notebook is to perform a computational dissection of the main differentiation pathway that CD4-T cells follow using the most variable features of our dataset.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(Signac)
library(GenomicRanges)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(data.table)
library(ggpubr)
library(xlsx)
library(plyr)
library(stringr)

2.2 Parameters

cell_type <- "CD4_T"

path_to_obj <- paste0(
  here::here("scATAC-seq/results/R_objects/level_5/"),
  cell_type,
  "/04.",
  cell_type,
  "_integration_peak_calling_level_5.rds",
  sep = ""
)


color_palette <-  c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", 
                    "#5A5156", "#AA0DFE", "#F8A19F", "#F7E1A0",
                    "#1C8356", "#FEAF16", "#822E1C", "#C4451C", 
                    "#1CBE4F", "#325A9B", "#F6222E", "#FE00FA",
                    "#FBE426", "#16FF32",  "black",  "#3283FE",
                    "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3", 
                    "#90AD1C", "#FE00FA", "#85660D", "#3B00FB", 
                    "#822E1C", "coral2",  "#1CFFCE", "#1CBE4F", 
                    "#3283FE", "#FBE426", "#F7E1A0", "#325A9B", 
                    "#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD",
                    "#FEAF16", "#683B79", "#B10DA1", "#1C7F93", 
                    "#F8A19F", "dark orange", "#FEAF16", 
                    "#FBE426", "Brown")

2.3 Functions

plot_dim <- function(seurat, group,color_palette){
  DimPlot(seurat, 
  group.by = group,
  cols = color_palette,
  pt.size = 0.1)}

3 CD4-T cells data

seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 93116 features across 16383 samples within 1 assay 
## Active assay: peaks_level_5 (93116 features, 92407 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony
seurat_peaks <- seurat@assays$peaks_level_5@ranges

#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/UMAP_final_annotation.pdf"), 
 #   width = 6, height = 4)

print(plot_dim(seurat, group = "annotation_paper",color_palette))

#dev.off()

3.1 Grouping the cells in Non-Tfh & Tfh groups

At low level of resolution, we want to detect the main epigenomic changes between Non-Tfh vs Tfh. For this reason, we decide to group the cells in 4 clusters: Non-Tfh, Tfh, Central Memory and Naive.

seurat@meta.data <- seurat@meta.data %>% mutate(Group =
  case_when(annotation_paper == "Naive" ~ "Naive",
    annotation_paper == "CM Pre-non-Tfh" ~ "Central Memory",
    annotation_paper == "CM PreTfh" ~ "Central Memory",
    annotation_paper == "T-Trans-Mem" ~ "Non-Tfh",
    annotation_paper == "T-Eff-Mem" ~ "Non-Tfh",
    annotation_paper == "T-helper" ~ "Non-Tfh",
    annotation_paper == "Tfh T:B border" ~ "Tfh",
    annotation_paper == "Tfh-LZ-GC" ~ "Tfh",
    annotation_paper == "GC-Tfh-SAP" ~ "Tfh",
    annotation_paper == "GC-Tfh-0X40" ~ "Tfh",
    annotation_paper == "Tfh-Mem" ~ "Tfh",
    annotation_paper == "Memory T cells" ~ "Non-Tfh",
    annotation_paper == "Eff-Tregs" ~ "Non-Tfh",
    annotation_paper == "non-GC-Tf-regs" ~ "Non-Tfh",
    annotation_paper == "GC-Tf-regs" ~ "Non-Tfh"))

#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/UMAP_main_groups.pdf"), 
 #   width = 6, 
  #  height = 4)

print(plot_dim(seurat, group = "Group",
               color_palette = c("black", "gray", "red", "yellow")))

#dev.off()

4 Latent semantic indexing analysis

We have seen that the relation of the second and the third components of the LSI reduction reflects the cell-fate decision of the naïve CD4-T cell towards Non-Tfh or Tfh. In order to verify this effect in detail, we decided to extract the feature.loadings of these two components and filter by the high variable peaks that could be explain this differention process. Using those peaks, we can re-compute the UMAP and see how the trajectory appears clearer.

a1 <- DimPlot(
  seurat, 
  group.by = "Group",
  cols = color_palette,
  pt.size = 0.4,
  reduction = "lsi",
  dims = 2:3
)

a2 <- DimPlot(
  seurat, 
  group.by = "annotation_paper",
  cols = color_palette,
  pt.size = 0.4,
  reduction = "lsi",
  dims = 2:3
)

a2

a1 

loading_2_3 <- seurat@reductions$lsi@feature.loadings[,c("LSI_2","LSI_3")]
loading_2_3.df <- as.data.frame(seurat@reductions$lsi@feature.loadings[,c("LSI_2","LSI_3")])
loading_2_3.melt <- melt(loading_2_3)

ggviolin(loading_2_3.melt, "Var2", "value", 
         fill = "Var2", palette = c("#00AFBB", "#E7B800"),
         add = "boxplot", add.params = list(fill = "white")) + 
         geom_hline(yintercept = c(-0.015,0.015))

# filter dataframe to get data to be highligheted
loadings_high <- loading_2_3.melt[loading_2_3.melt$value > 0.015 | loading_2_3.melt$value < -0.015,]$Var1

highlight_df <- loading_2_3.df[loadings_high,]

loading_2_3.df %>% 
  ggplot(aes(x=LSI_2,y=LSI_3)) + 
  geom_point(alpha=0.3) +
  geom_point(data=highlight_df, 
             aes(x=LSI_2,y=LSI_3), 
             color='red',
             size=2) + theme_minimal() 

We can re-compute the UMAP using the high variable features previously defined.

loadings_high_UMAP <- RunUMAP(object = seurat, 
                features = loadings_high)

#saveRDS(object = loadings_high_UMAP,
 #       file = here::here("scATAC-seq/results/plots/CD4-T/files_plots/loadings_high_UMAP.rds"))

#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/UMAP_HVF_4groups.pdf"), 
 #   width = 6, height = 4)

DimPlot(object = loadings_high_UMAP, 
        label = F,group.by = "Group",
        pt.size = 0.3,
        cols = c("black", "gray", "red", "yellow"))

#dev.off()

DimPlot(ncol = 4, object = loadings_high_UMAP, 
        label = F,split.by = "Group",
        pt.size = 0.3,
        cols = color_palette) + NoLegend()

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Motif_TF/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.6           xlsx_0.6.5           ggpubr_0.4.0         data.table_1.14.0    forcats_0.5.0        stringr_1.4.0        purrr_0.3.4          readr_1.4.0          tidyr_1.1.3          tibble_3.1.2         tidyverse_1.3.0      ggplot2_3.3.5        dplyr_1.0.7          GenomicRanges_1.40.0 GenomeInfoDb_1.24.2  IRanges_2.22.1       S4Vectors_0.26.0     BiocGenerics_0.34.0  Signac_1.2.1         SeuratObject_4.0.2   Seurat_4.0.3         BiocStyle_2.16.1    
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.1             reticulate_1.20        tidyselect_1.1.1       htmlwidgets_1.5.3      grid_4.0.3             docopt_0.7.1           BiocParallel_1.22.0    Rtsne_0.15             munsell_0.5.0          codetools_0.2-17       ica_1.0-2              future_1.21.0          miniUI_0.1.1.1         withr_2.4.2            colorspace_2.0-2       knitr_1.30             rstudioapi_0.11        ROCR_1.0-11            ggsignif_0.6.0         tensor_1.5             rJava_0.9-13           listenv_0.8.0          labeling_0.4.2         slam_0.1-47            GenomeInfoDbData_1.2.3 polyclip_1.10-0        farver_2.1.0           rprojroot_2.0.2        parallelly_1.26.1      vctrs_0.3.8            generics_0.1.0         xfun_0.18              lsa_0.73.2             ggseqlogo_0.1          R6_2.5.0               bitops_1.0-7           spatstat.utils_2.2-0   assertthat_0.2.1       promises_1.2.0.1       scales_1.1.1           gtable_0.3.0           globals_0.14.0         goftest_1.2-2          rlang_0.4.11           RcppRoll_0.3.0         splines_4.0.3          rstatix_0.6.0          lazyeval_0.2.2         spatstat.geom_2.2-0    broom_0.7.2            BiocManager_1.30.10   
##  [52] yaml_2.2.1             reshape2_1.4.4         abind_1.4-5            modelr_0.1.8           backports_1.1.10       httpuv_1.6.1           tools_4.0.3            bookdown_0.21          ellipsis_0.3.2         spatstat.core_2.2-0    RColorBrewer_1.1-2     ggridges_0.5.3         Rcpp_1.0.6             zlibbioc_1.34.0        RCurl_1.98-1.2         rpart_4.1-15           deldir_0.2-10          pbapply_1.4-3          cowplot_1.1.1          zoo_1.8-9              haven_2.3.1            ggrepel_0.9.1          cluster_2.1.0          here_1.0.1             fs_1.5.0               magrittr_2.0.1         RSpectra_0.16-0        scattermore_0.7        openxlsx_4.2.4         lmtest_0.9-38          reprex_0.3.0           RANN_2.6.1             SnowballC_0.7.0        fitdistrplus_1.1-5     matrixStats_0.59.0     xlsxjars_0.6.1         hms_0.5.3              patchwork_1.1.1        mime_0.11              evaluate_0.14          xtable_1.8-4           rio_0.5.16             sparsesvd_0.2          readxl_1.3.1           gridExtra_2.3          compiler_4.0.3         KernSmooth_2.23-17     crayon_1.4.1           htmltools_0.5.1.1      mgcv_1.8-33            later_1.2.0           
## [103] lubridate_1.7.9        DBI_1.1.0              tweenr_1.0.1           dbplyr_1.4.4           MASS_7.3-53            Matrix_1.3-4           car_3.0-10             cli_3.0.0              igraph_1.2.6           pkgconfig_2.0.3        foreign_0.8-80         plotly_4.9.4.1         spatstat.sparse_2.0-0  xml2_1.3.2             XVector_0.28.0         rvest_0.3.6            digest_0.6.27          sctransform_0.3.2      RcppAnnoy_0.0.18       spatstat.data_2.1-0    Biostrings_2.56.0      rmarkdown_2.5          cellranger_1.1.0       leiden_0.3.8           fastmatch_1.1-0        uwot_0.1.10            curl_4.3.2             shiny_1.6.0            Rsamtools_2.4.0        lifecycle_1.0.0        nlme_3.1-150           jsonlite_1.7.2         carData_3.0-4          viridisLite_0.4.0      fansi_0.5.0            pillar_1.6.1           lattice_0.20-41        fastmap_1.1.0          httr_1.4.2             survival_3.2-7         glue_1.4.2             qlcMatrix_0.9.7        zip_2.1.1              png_0.1-7              ggforce_0.3.2          stringi_1.6.2          blob_1.2.1             irlba_2.3.3            future.apply_1.7.0